%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for William Dabney at 2012-12-15 14:01:48 -0500 


%% Saved with string encoding Unicode (UTF-8) 



@article{autostep,
	Author = {Ashique Rupam Mahmood and Richard S. Sutton and Thomas Degris and Patrick M. Pilarski},
	Date-Added = {2012-12-15 19:01:43 +0000},
	Date-Modified = {2012-12-15 19:01:43 +0000},
	Journal = {International Conference on Acoustics, Speech, and Signal Processing},
	Title = {Tuning-Free Step-Size Adaptation},
	Year = {2012}}

@incollection{Konidaris2011b,
	Author = {G. D. Konidaris and S. Niekum and P. S. Thomas},
	Booktitle = {Advances in Neural Information Processing Systems 24},
	Editor = {J. Shawe-Taylor and R.S. Zemel and P. Bartlett and F.C.N. Pereira and K.Q. Weinberger},
	Pages = {2402--2410},
	Title = {TD$_\gamma$: Re-evaluating Complex Backups in Temporal Difference Learning},
	Year = {2011}}

@inproceedings{Bergstra2012,
	Author="J. Bergstra and Y. Bengio",
	Title="Random search for hyper-parameter optimization",
	Year="2012",
	Booktitle="Journal of Machine Learning Research",
	}

@misc{Slate1991,
	Author = {D. Slate},
	Institution = {University of California, Irvine, School of Information and Computer Sciences},
	Title = {{UCI} Machine Learning Repository},
	Url = {http://archive.ics.uci.edu/ml},
	Year = {1991},
	Bdsk-Url-1 = {http://archive.ics.uci.edu/ml}}

@misc{Frank+Asuncion:2010,
	Author = {A. Frank and A. Asuncion},
	Institution = {University of California, Irvine, School of Information and Computer Sciences},
	Title = {{UCI} Machine Learning Repository},
	Url = {http://archive.ics.uci.edu/ml},
	Year = {2010},
	Bdsk-Url-1 = {http://archive.ics.uci.edu/ml}}

@inproceedings{Thomas2012b,
	Author = {Anonymous},
	Booktitle = {Proceedings of the Conference on Uncertainty in Artificial Intelligence},
	Note = {Submitted},
	Title = {Natural Actor-Critic using {S}arsa$(\lambda)$},
	Year = {2012}}

@article{Bertsekas2000,
	Author = {D. P. Bertsekas and J. N. Tsitsiklis},
	Journal = {SIAM J. Optim.},
	Pages = {627--642},
	Title = {Gradient convergence in gradient methods},
	Volume = 10,
	Year = {2000}}

@unpublished{Florian2007,
	Author = {R. V. Florian},
	Note = {Center for Cognitive and Neural Studies, viewed January 2012},
	Title = {Correct equations for the dynamics of the cart-pole system},
	Year = {2007}}

@inproceedings{Schwartz1993,
	Author = {A. Schwartz},
	Booktitle = {Proceedings of the Tenth International Conference on Machine Learning},
	Pages = {298--305},
	Title = {A reinforcement learning method for maximizing undiscounted rewards},
	Year = {1993}}

@article{Bradtke1996,
	Author = {S. J. Bradtke and A. G. Barto},
	Journal = {Machine Learning},
	Pages = {33-57},
	Title = {Linear Least-Squares Algorithms for Temporal Difference Learning},
	Volume = {22},
	Year = {1996}}

@inproceedings{Bagnell2003,
	Author = {J. A. Bagnell and J. Schneider},
	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence},
	Pages = {1019--1024},
	Title = {Covariant policy search},
	Year = {2003}}

@article{Barto1983,
	Author = {A. G. Barto and R. S. Sutton and C. W. Anderson},
	Journal = {IEEE Transactions on Systems, Man, and Cybernetics},
	Number = {5},
	Pages = {834--846},
	Title = {Neuronlike adaptive elements that can solve difficult learning control problems},
	Volume = {13},
	Year = {1983}}

@inproceedings{Atkeson1994,
	Author = {C. G. Atkeson},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {663--670},
	Title = {Using local trajectory optimizers to speed up global optimization in dynamic programming},
	Year = {1994}}

@inproceedings{Zhao2011,
	Author = {T. Zhao and H. Hachiya and G. Niu and M. Sugiyama},
	Booktitle = {Advances in Neural Information Processing Systems},
	Title = {Analysis and Improvement of Policy Gradient Estimation},
	Year = {2011}}

@inproceedings{Maei2010,
	Author = {H. R. Maei and R. S. Sutton},
	Booktitle = {Proceedings of the Third Conference on Artificial General Intelligence},
	Pages = {91--96},
	Title = {{GQ$(\lambda)$}: {A} general gradient algorithm for temporal-difference prediction learning with eligibility traces},
	Year = {2010}}

@inproceedings{Kober2008,
	Author = {J. Kober and J. Peters},
	Booktitle = {Advances in Neural Information Processing Systems},
	Title = {Policy search for motor primitives in robotics},
	Year = {2008}}

@inproceedings{Kober2009,
	Author = {J. Kober and J. Peters},
	Booktitle = {IEEE International Conference on Robotics and Automation},
	Title = {Learning Motor Primitives for Robotics},
	Year = {2009}}

@article{Sehnke2010,
	Author = {F. Sehnke and C. Osendorfer and T. Ruckstiess and A. Graves and J. Peters and J. Schmidhuber},
	Journal = {Neural Networks},
	Number = {4},
	Pages = {551--559},
	Title = {Parameter-exploring policy gradients},
	Volume = {23},
	Year = {2010}}

@article{Hinton2002,
	Author = {G. E. Hinton},
	Journal = {Neural Computation},
	Number = {8},
	Pages = {1771--1800},
	Title = {Training products of experts by minimizing contrastive divergence},
	Volume = {14},
	Year = {2002}}

@article{Dominici2011,
	Author = {N. Dominici and Y. P. Ivanenko and G. Cappellini and A. d'Avella and V. Mondi and M. Cicchese and A. Fabiano and T. Silei and A. Di Paolo and C. Giannini and R. E. Poppele and F. Lacquaniti},
	Journal = {Science},
	Pages = {997--999},
	Title = {Locomotor primitives in newborn babies and their development},
	Volume = {334},
	Year = {2011}}

@inproceedings{Alessandro2012,
	Author = {C. Alessandro and F. Nori},
	Booktitle = {The Fourth IEEE RAS/EMBS International Conference on Biomedical Robotics and Biomechatronics},
	Title = {Identification of Synergies by Optimization of Trajectory Tracking Tasks},
	Year = {2012}}

@article{Jacobs1991,
	Author = {R. A. Jacobs and M. I. Jordan and S. J. Nowlan and G. E. Hinton},
	Journal = {Neural Computation},
	Number = {1},
	Pages = {79--87},
	Title = {Adaptive mixtures of local experts},
	Volume = {3},
	Year = {1991}}

@inproceedings{Boyan1999,
	Author = {J. Boyan},
	Booktitle = {Proceedings of the Sixteenth International Conference on Machine Learning},
	Pages = {49--56},
	Title = {Least-squares temporal difference learning},
	Year = {1999}}

@inproceedings{Thrun1995,
	Author = {S. Thrun and A. Schwartz},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {385--392},
	Title = {Finding Structure in Reinforcement Learning},
	Year = {1995}}

@article{Theodorou2010,
	Author = {E. A. Theodorou and J. Buchli and S. Schaal},
	Journal = {Journal of Machine Learning},
	Pages = {3137--3181},
	Title = {A Generalized Path Integral Control Approach to Reinforcement Learning},
	Volume = {11},
	Year = {2010}}

@article{Robbins1951,
	Author = {H. Robbins and S. Monro},
	Journal = {Annals of Mathematical Statistics},
	Number = {3},
	Pages = {400-407},
	Title = {A Stochastic Approximation Method},
	Volume = {22},
	Year = {1951}}

@inproceedings{Szepesvari1997,
	Author = {C. S. Szepesvari},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {1064--1070},
	Title = {The Asymptotic Convergence-Rate of Q-learning},
	Volume = {10},
	Year = {1997}}

@article{Spall2000,
	Author = {J. C. Spall},
	Journal = {IEEE Transactions on Automatic Control},
	Number = {10},
	Pages = {1839--1853},
	Title = {Adaptive Stochastic Approximation by the Simultaneous Perturbation Method},
	Volume = {45},
	Year = {2000}}

@article{Kiefer1952,
	Author = {J. Kiefer and J. Wolfowitz},
	Journal = {Annals of Mathematical Statistics},
	Number = {3},
	Pages = {462--466},
	Title = {Stochastic Estimation of the Maximum of a Regression Function},
	Volume = {23},
	Year = {1952}}

@article{Polyak1992,
	Author = {B.T. Polyak and A.B. Juditsky},
	Journal = {SIAM Journal on Control and Optimization},
	Number = {4},
	Pages = {838--855},
	Title = {Acceleration of Stochastic Approximation by Averaging},
	Volume = {30},
	Year = {1992}}

@inproceedings{Chadwick2009,
	Author = {E. K. Chadwick and D. Blana and A. J. van den Bogert and R. F. Kirsch},
	Booktitle = {IEEE Transactions on Biomedical Engineering},
	Pages = {941--948},
	Title = {A real-time 3-{D} musculoskeletal model for dynamic simulation of arm movements},
	Volume = {56},
	Year = {2009}}

@article{Williams1992,
	Author = {R. J. Williams},
	Journal = {Machine Learning},
	Pages = {229--256},
	Title = {Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning},
	Volume = {8},
	Year = {1992}}

@inproceedings{Kakade2002,
	Author = {S. Kakade},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {1531--1538},
	Title = {A Natural Policy Gradient},
	Volume = {14},
	Year = {2002}}

@inproceedings{Kakade2001,
	Author = {S. Kakade},
	Booktitle = {Proceedings of the 14th Annual Conference on Computational Learning Theory},
	Title = {Optimizing Average Reward Using Discounted Rewards},
	Year = {2001}}

@inproceedings{Engel2006,
	Author = {Y. Engel and P. Szabo and D. Volkinshtein},
	Booktitle = {Advances in Neural Information Processing Systems 18},
	Pages = {347--354},
	Title = {Learning to control an octopus arm with {G}aussian process temporal difference methods},
	Year = {2006}}

@inproceedings{Lemay2001,
	Author = {M. A. Lemay and J. E. Galagan and N. Hogan and E. Bizzi},
	Booktitle = {IEEE Transactions on Neural Systems and Rahabilitation Engineering},
	Pages = {12--23},
	Title = {Modulation and vectorial summation of the spinalized frog's hindlimb end-point force produced by intraspinal electrical stimulation of the cord},
	Volume = {9},
	Year = {2001}}

@inproceedings{Bogert2011,
	Author = {A. J. van den Bogert and D. Blana and D. Heinrich},
	Booktitle = {IUTAM Symposium on Human Body Dynamics},
	Pages = {297--316},
	Title = {Implicit methods for efficient musculoskeletal simulation and optimal control},
	Volume = {2},
	Year = {2011}}

@article{Blana2009,
	Author = {D. Blana and R. F. Kirsch and E. K. Chadwick},
	Journal = {Medical and Biological Engineering and Computing},
	Pages = {533-542},
	Title = {Combined feedforward and feedback control of a redundant, nonlinear, dynamic musculoskeletal system},
	Volume = {47},
	Year = {2009}}

@inproceedings{Rohanimanesh2001,
	Author = {K. Rohanimanesh and S. Mahadevan},
	Booktitle = {Proceedings of the Seventeenth Conference on Uncertainty in Artificial Intelligence},
	Pages = {472--479},
	Title = {Decision-theoretic planning with concurrent temporally extended actions},
	Year = {2001}}

@article{Ivaldi1994,
	Author = {F. A. Mussa-Ivaldi and S. F. Giszter and E. Bizzi},
	Journal = {Proceedings of the National Academy of Sciences},
	Pages = {7534--7538},
	Title = {Linear combinations of primitives in vertebrate motor control},
	Volume = {91},
	Year = {1994}}

@inproceedings{Hauskrecht1998,
	Author = {M. Hauskrecht and N. Meuleau and C. Boutilier and L. P. Kaelbling and T. Dean},
	Booktitle = {Unvertainty in Artificial Intelligence},
	Pages = {220--229},
	Title = {Hierarchical Solution of Markov Decision Processes using Macro-actions},
	Year = {1998}}

@article{Ivaldi2000,
	Author = {F. A. Mussa-Ivaldi and E. Bizzi},
	Journal = {Phil. Trans. R. Soc. B},
	Number = {1404},
	Pages = {1755--1769},
	Title = {Motor Learning through the Combination of Primitives},
	Volume = {355},
	Year = {2000}}

@article{Lemay2004,
	Author = {M. A. Lemay and Warren M. Grill},
	Journal = {Journal of Neurophysiology},
	Pages = {502--514},
	Title = {Modularity of Motor Output Evoked by Intraspinal Microstimulation in Cats},
	Volume = {91},
	Year = {2004}}

@techreport{Baird1993,
	Author = {L. C. Baird and A. H. Klopf},
	Institution = {Wright-Patterson Air Force Base},
	Number = {WL-TR-93-1147},
	Title = {Reinforcement learning with high-dimensional, continuous actions},
	Year = {1993}}

@techreport{Dietterich1997,
	Author = {T. G. Dietterich},
	Institution = {Department of Computer Science, Oregon State University},
	Title = {Hierarchical reinforcement learning with the MAXQ value function decomposition},
	Year = {1997}}

@techreport{Baird1993b,
	Author = {L. C. Baird},
	Institution = {Wright-Patterson Air Force Base},
	Number = {WL-TR-93-1146},
	Title = {Advantage Updating},
	Year = {1993}}

@article{Sallans2004,
	Author = {B. A. Sallans and G. E. Hinton},
	Journal = {Journal of Machine Learning},
	Pages = {1063--1088},
	Title = {Reinforcement Learning with Factored States and Actions},
	Volume = 5,
	Year = {2004}}

@article{Frank2006,
	Author = {M. J. Frank and E. D. Claus},
	Journal = {Psychological Review},
	Number = {2},
	Pages = {300-326},
	Title = {Anatomy of a decision: {S}triato-Orbitofrontal Interactions in Reinforcement Learning, Decision Making, and Reversal},
	Volume = {113},
	Year = {2006}}

@article{Daw2006,
	Author = {N. Daw and K. Doya},
	Journal = {Current Opinion in Neurobiology},
	Pages = {199--204},
	Title = {The computational neurobiology of learning and reward},
	Volume = 16,
	Year = {2006}}

@article{Schultz1998,
	Author = {W. Schultz},
	Journal = {Journal of Neurophysiology},
	Pages = {1-27},
	Title = {Predictive Reward Signal of Dopamine Neurons},
	Volume = 80,
	Year = {1998}}

@article{Baxter2001,
	Author = {J. Baxter and P. Bartlett},
	Journal = {Journal of Artificial Intelligence Research},
	Pages = {319--350},
	Title = {Infinite-horizon policy-gradient estimation},
	Volume = 15,
	Year = {2001}}

@article{Chakravarthy2010,
	Author = {V. Chakravarthy and D. Joseph and R. Bapi},
	Journal = {Biological Cybernetics},
	Pages = {237--253},
	Title = {What do the basal ganglia do? A modeling perspective},
	Volume = 103,
	Year = {2010}}

@article{Stocco2010,
	Author = {A. Stocco and C. Lebiere and J. Anderson},
	Journal = {Psychological Review},
	Number = 2,
	Pages = {541--574},
	Title = {Conditional routing of information to the cortex: A model of the basal ganglia's role in cognitive coordination},
	Volume = 117,
	Year = {2010}}

@article{Chang2009,
	Author = {A. Claridge-Chang and R. Roorda and E. Vrontou and L. Sjulson and H. Li and J. Hirsh and G. Miesenbock},
	Journal = {Cell},
	Number = 2,
	Pages = {405--415},
	Title = {Writing memories with light-addressable reinforcement circuitry},
	Volume = 193,
	Year = {2009}}

@article{Sutton1981,
	Author = {R. Sutton and A. Barto},
	Journal = {Psychological Review},
	Pages = {135--140},
	Title = {Toward a modern theory of adaptive networks: Expectation and prediction},
	Volume = 88,
	Year = {1981}}

@article{Barto1982,
	Author = {A. Barto and R. Sutton},
	Journal = {Behavioral Brain Research},
	Pages = {221-235},
	Title = {Simulation of anticipatory responses in classical conditioning by a neuron-like adaptive element},
	Volume = 4,
	Year = {1982}}

@article{Montague1995,
	Author = {P. Montague and P. Dayan and C. Person and T. Sejnowski},
	Journal = {Nature},
	Pages = {725--728},
	Title = {Bee foraging in uncertain environments using predictive hebbian learning},
	Volume = 377,
	Year = {1995}}

@article{Doya1999,
	Author = {K. Doya},
	Journal = {Neural Networks},
	Pages = {961--974},
	Title = {What are the computations of the cerebellum, the basal ganglia and the cerebral cortex?},
	Volume = 12,
	Year = {1999}}

@article{Ludvig2008,
	Author = {E. Ludvig and R. Sutton and E. Kehoe},
	Journal = {Neural Computation},
	Pages = {3034--3035},
	Title = {Stimulus representation and the timing of reward-prediction errors in models of the dopamine system},
	Volume = {20},
	Year = {2008}}

@article{Sutton1988,
	Author = {R. Sutton},
	Journal = {Machine Learning},
	Pages = {9--44},
	Title = {Learning to predict by the methods of temporal differences},
	Volume = 3,
	Year = {1988}}

@article{Schultz1997,
	Author = {W. Schultz and P. Dayan and P. Montague},
	Journal = {Science},
	Pages = {1593--1599},
	Title = {A Neural substrate of prediction and reward},
	Volume = 275,
	Year = {1992}}

@article{Peters2010,
	Author = {J. Peters and J. A. Bagnell},
	Journal = {Encyclopedia of Machine Learning},
	Title = {Policy Gradient Methods},
	Year = {2010}}

@article{Peters2010b,
	Author = {J. Peters},
	Journal = {Scholarpedia},
	Number = {11},
	Pages = {3698},
	Title = {Policy gradient methods},
	Volume = {5},
	Year = {2010}}

@article{Mahadevan2009,
	Author = {S. Mahadevan},
	Editor = {M. Jordan},
	Journal = {Foundations and Trends in Machine Learning},
	Number = 4,
	Pages = {403--565},
	Title = {Learning representation and control in {M}arkov Decision Processes: New Frontiers},
	Volume = 1,
	Year = {2009}}

@article{Crick1989,
	Author = {F. H. C. Crick},
	Journal = {Nature},
	Pages = {129--132},
	Title = {The recent excitement about neural networks},
	Volume = {337},
	Year = {1989}}

@article{Bhatnagar2009,
	Author = {S. Bhatnagar and R. S. Sutton and M. Ghavamzadeh and M. Lee},
	Journal = {Automatica},
	Number = {11},
	Pages = {2471--2482},
	Title = {Natural actor-critic algorithms},
	Volume = {45},
	Year = {2009}}

@article{Zipser1988,
	Author = {D. Zipser and R. A. Andersen},
	Journal = {Nature},
	Pages = {679--684},
	Title = {A back propagation programmed network that simulates response properties of a subset of posterior parietal neurons},
	Volume = {331},
	Year = {1988}}

@inproceedings{Amari1998b,
	Author = {S. Amari and S. Douglas},
	Booktitle = {Proceedings of the 1998 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP '98)},
	Pages = {1213--1216},
	Title = {Why Natural Gradient?},
	Volume = 2,
	Year = {1998}}

@inproceedings{Rivest2005,
	Author = {F. Rivest and Y. Bengio and J. Kalaska},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {1129--1136},
	Title = {Brain inspired reinforcement learning},
	Year = {2005}}

@inproceedings{Morimura2009,
	Author = {T. Morimura and E. Uchibe and J. Yoshimoto and K. Doya},
	Booktitle = {Neural Information Processing Systems: Natural and Synthetic},
	Title = {A Generalized Natural Actor-Critic Algorithm},
	Year = {2009}}

@inproceedings{Bertsekas2009,
	Author = {D. Bertsekas, Y. Huizhen},
	Booktitle = {IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning},
	Title = {Basis Function Adaptation Methods for Cost Approximation in MDP},
	Year = {2009}}

@inproceedings{Mahadevan2010,
	Author = {S. Mahadevan and B. Liu},
	Booktitle = {Neural Information Proessing Systems},
	Title = {Basis Construction from Power Series Expansions of Value Functions},
	Year = {2010}}

@article{Bethke2008,
	Author = {B. Bethke and J. How and A. Ozdaglar},
	Journal = {Journal of Machine Learning Research},
	Title = {Kernel-Based Reinforcement Learning Using Bellman Residual Elimination},
	Year = {2008}}

@inproceedings{Bagnell2003,
	Author = {J. Bagnell and J. Schneider},
	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence},
	Pages = {1019--1024},
	Title = {Covariant policy search},
	Year = {2003}}

@inproceedings{Degris2012,
	Author = {T. Degris and P. M. Pilarski, and R. S. Sutton},
	Booktitle = {Proceedings of the 2012 American Control Conference},
	Title = {Model-Free Reinforcement Learning with Continuous Action in Practice},
	Year = {2012}}

@article{Menache2005,
	Author = {I. Menache and S. Mannor and N. Shimkin},
	Journal = {Annals of Operations Research},
	Pages = {215--238},
	Title = {Basis function adaptation in temporal difference reinforcement learning},
	Volume = {134},
	Year = {2005}}

@article{arp,
	Author = {A. Barto and P. Anandan},
	Journal = {IEEE T Syst Man Cyb},
	Pages = {360--75},
	Title = {Pattern Recognizing Stochastic Learning Automata},
	Volume = 15,
	Year = {1985}}

@inproceedings{Munos2003,
	Author = {R. Munos},
	Booktitle = {Machine Learning, Proceedings of the Twentieth International Conference},
	Editor = {T. Fawcett and N. Mishra},
	Pages = {560--567},
	Title = {Error bounds for approximate policy iteration},
	Year = {2003}}

@inproceedings{Lagoudakis2001,
	Author = {M. Lagoudakis and R. Parr},
	Booktitle = {Neural Information Processing Systems: Natural and Synthetic},
	Pages = {1547--1554},
	Title = {Model-Free Least-Squares Policy Iteration},
	Year = {2001}}

@inproceedings{Morimura2005,
	Author = {T. Morimura and E. Uchibe and K. Doya},
	Booktitle = {International Symposium on Information Geometry and its Application},
	Pages = {256--263},
	Title = {Utilizing the natural gradient in temporal difference reinforcement learning with eligibility traces},
	Year = {2005}}

@inproceedings{Peters2006,
	Author = {J. Peters and S. Schaal},
	Booktitle = {Proceedings of the IEEE/RSJ International Conference on Intelligent Robots and Systems},
	Title = {Policy Gradient Methods for Robotics},
	Year = {2006}}

@inproceedings{Barto1987,
	Address = {San Diego, CA},
	Author = {A. Barto and M.Jordan},
	Booktitle = {IEEE T Neural Networ},
	Editor = {M. Caudill and C. Butler},
	Pages = {II629--36},
	Title = {Gradient Following Without Back-Propagation in Layered Networks},
	Year = {1987}}

@article{Bezdek1987,
	Author = {J. Bezdek and R. Hathaway and R. Howard and C. Wilson and M. Windham},
	Journal = {J. of Optimization Theory and Applications},
	Number = {3},
	Pages = {471--7},
	Title = {Local Convergence Analysis of Grouped Variable Version of Coordinate Descent},
	Volume = {54},
	Year = {1987}}

@inproceedings{Kohl2004,
	Author = {N. Kohl and P. Stone},
	Booktitle = {IEEE T Robotic Autom},
	Title = {Policy gradient reinforcement learning for fast quadrupedal locomotion},
	Year = {2004}}

@inproceedings{Thomas2011,
	Author = {P. S. Thomas and A. G. Barto},
	Booktitle = {Proceedings of the Twenty-Eighth International Conference on Machine Learning},
	Pages = {137--144},
	Title = {Conjugate {M}arkov Decision Processes},
	Year = {2011}}

@incollection{Thomas2011b,
	Author = {P. S. Thomas},
	Booktitle = {Advances in Neural Information Processing Systems 24},
	Editor = {J. Shawe-Taylor and R.S. Zemel and P. Bartlett and F.C.N. Pereira and K.Q. Weinberger},
	Pages = {1944--1952},
	Title = {Policy Gradient Coagent Networks},
	Year = {2011}}

@book{SuttonBarto,
	Address = {Cambridge, MA},
	Author = {R. S. Sutton and A. G. Barto},
	Publisher = {MIT Press},
	Title = {Reinforcement Learning: {A}n Introduction},
	Year = {1998}}

@book{Bellman1957,
	Author = {Richard E. Bellman},
	Publisher = {Rand Corporation},
	Title = {Dynamic Programming},
	Year = {1957}}

@book{Rumelhart1986,
	Address = {Cambridge, MA},
	Author = {D. E. Rumelhart and J. L. McClelland},
	Publisher = {MIT Press},
	Title = {Parallel distributed processing. volume 1: {F}oundations},
	Year = {1986}}

@inproceedings{Schweighofer2002,
	Address = {Orlando, USA},
	Author = {N. Schweighofer and K. Doya},
	Booktitle = {32nd Annual Meeting, Society for Neuroscience},
	Title = {A biologically plausible computational model of meta-learning in reinforcement learning},
	Year = {2002}}

@inproceedings{Elfwing2008,
	Author = {S. Elfwing and E. Uchibe and K. Doya and K. Christensen},
	Booktitle = {Adaptive Behavior},
	Title = {Co-evolution of shaping rewards and meta-parameters in reinforcement learning},
	Year = {2008}}

@inproceedings{Precup2010,
	Author = {G. Comanici and D. Precup},
	Booktitle = {AAMAS},
	Title = {Optimal Policy Switching Algorithms for Reinforcement Learning},
	Year = {2010}}

@inproceedings{Konidaris2009,
	Author = {G. Konidaris and A. Barto},
	Booktitle = {Advances in Neural Information Processing Systems 22},
	Month = {December},
	Title = {Skill discovery in continuous reinforcement learning domains using skill chaining},
	Year = {2009}}

@inproceedings{Simsek2008,
	Address = {Vancouver, B.C, Canada},
	Author = {O. Simsek and A. Barto},
	Booktitle = {Proceedings of the 22nd Annual Conference on Neural Information Processing Systems},
	Month = {December},
	Title = {Skill characterization based on betweenness},
	Year = {2008}}

@inproceedings{Singh09,
	Address = {Austin, TX},
	Author = {S. Singh and R. Lewis and A. Barto},
	Booktitle = {Proceedings of the 31st Annual Conference of the Cognitive Science Society},
	Title = {Where do rewards come from?},
	Year = {2009}}

@inproceedings{Ipek2008,
	Author = {E. Ipek and O. Mutlu and J Martinez and R. Caruana},
	Booktitle = {International Symposium on Computer Architecture},
	Title = {Self-Optimizing Memory Controllers: A Reinforcement Learning Approach},
	Year = {2008}}

@inproceedings{thomasYale,
	Address = {New Haven, CT},
	Author = {P. Thomas and M. Branicky and A. van den Bogert and K. Jagodnik},
	Booktitle = {Proceedings of the Fourteenth Yale Workshop on Adaptive and Learning Systems},
	Month = {June},
	Title = {Creating a reinforcement learning controller for functional electrical stimulation of a human arm},
	Year = {2008}}

@inproceedings{thomasIAAI,
	Author = {P. S. Thomas and M. S. Branicky and A. J. van den Bogert and K. M. Jagodnik},
	Booktitle = {Proceedings of the Twenty-First Innovative Applications of Artificial Intelligence},
	Pages = {165--172},
	Title = {Application of the actor-critic architecture to functional electrical stimulation control of a human arm},
	Year = {2009}}

@inproceedings{Thomas2009,
	Author = {P. S. Thomas and M. S. Branicky and A. J. van den Bogert and K. M. Jagodnik},
	Booktitle = {Proceedings of the Twenty-First Innovative Applications of Artificial Intelligence},
	Pages = {165--172},
	Title = {Application of the actor-critic architecture to functional electrical stimulation control of a human arm},
	Year = {2009}}

@inproceedings{Konidaris2011,
	Author = {G. D. Konidaris and S. Osentoski and P. S. Thomas},
	Booktitle = {Proceedings of the Twenty-Fifth Conference on Artificial Intelligence},
	Pages = {1468--1473},
	Title = {Value Function Approximation using the {F}ourier basis},
	Year = {2011}}

@inproceedings{Sutton2000,
	Author = {R. S. Sutton and D. McAllester and S. Singh and Y. Mansour},
	Booktitle = {Advances in Neural Information Processing Systems 12},
	Pages = {1057--1063},
	Title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
	Year = {2000}}

@mastersthesis{thomasThesis,
	Author = {P. S. Thomas},
	Month = {August},
	School = {Department of Electrical Engineering and Computer Science, Case Western Reserve University},
	Title = {A reinforcement learning controller for functional electrical stimulation of a human arm},
	Year = {2009}}

@phdthesis{Skelly2004,
	Author = {M. Skelly},
	School = {Case Western Reserve University},
	Title = {Hierarchical reinforcement learning with function approximation for adaptive control},
	Year = {2004}}

@inproceedings{Todorov2003,
	Author = {E. Todorov and Z. Ghahramani},
	Booktitle = {Proceedings of the 25th Annual International Conference on the IEEE Engineering in Biology and Medicine Society},
	Pages = {1750--1753},
	Title = {Unsupervised learning of sensory-motor primitives},
	Year = 2003}

@phdthesis{OReilly1996,
	Author = {R. C. O'Reilly},
	School = {Carnegie Mellon University},
	Title = {The {LEABRA} model of neural interactions nad learning in the neocortex},
	Year = {1996}}

@phdthesis{Watkins1989,
	Author = {C. Watkins},
	School = {University of Cambridge, England},
	Title = {Learning from delayed rewards},
	Year = {1989}}

@techreport{Konidaris2008,
	Author = {G. Konidaris and S. Osentoski},
	Institution = {Department of Computer Science, University of Massachusetts at Amherst},
	Month = {June},
	Number = {UM-CS-2008-19},
	Title = {Value function approximation in reinforcement learning using the fourier basis},
	Year = {2008}}

@techreport{Thomas2012,
	Author = {P. S. Thomas},
	Institution = {Department of Computer Science, University of Massachusetts at Amherst},
	Month = {October},
	Number = {UM-CS-2012-018},
	Title = {Bias in Natural Actor-Critic Algorithms},
	Year = {2012}}

@techreport{Marbach1998,
	Author = {P. Marbach and J. N. Tsitsiklis},
	Institution = {Massachusetts Institute of Technology},
	Number = {LIDS-P-2411},
	Title = {Simulation-based optimization of {M}arkov reward processes},
	Year = {1998}}

@techreport{Baxter1999,
	Author = {J. Baxter and P. L. Bartlett},
	Institution = {Research School of Information Sciences and Engineering, Australian National University},
	Title = {Direct gradient-based reinforcement learning: {I}. gradient estimation algorithms},
	Year = {1999}}

@article{Doya2000,
	Author = {K. Doya},
	Journal = {Neural Computation},
	Number = {1},
	Pages = {219--245},
	Title = {Reinforcement learning in continuous time and space},
	Volume = {12},
	Year = {2000}}

@article{Amari1998,
	Author = {S. Amari},
	Journal = {Neural Computation},
	Pages = {251--276},
	Title = {Natural Gradient Works Efficiently in Learning},
	Volume = {10},
	Year = {1998}}

@article{Sutton1999,
	Author = {R. Sutton and D. Precup and S. Singh},
	Journal = {Artificial Intelligence},
	Pages = {181--211},
	Title = {Between MDPs and semi-MDPs: A Framework for Temporal Abstraction in Reinforcement Learning},
	Volume = {112},
	Year = {1999}}

@article{Mahadevan2007,
	Author = {S. Mahadevan and M. Maggioni},
	Journal = {Journal of Machine Learning Research},
	Pages = {2169-2231},
	Title = {Proto-value functions: a laplacian framework for learning representation and control in markov decision processes},
	Volume = {8},
	Year = {2007}}

@inproceedings{yu2009,
	Author = {H. Yu and D. Bertsekas},
	Booktitle = {Proceedings of 2009 IEEE Symposium on Approximate Dynamic Programming and Reinforcement Learning},
	Title = {Basis function adaptation methods for cost approximation in MDP},
	Year = {2009}}

@inproceedings{KathyPD,
	Address = {Philadelphia, PA},
	Author = {K. Jagodnik and A. van den Bogert},
	Booktitle = {12th Annual Conference International FES Society},
	Title = {A proportional derivative FES controller for planar arm movement},
	Year = {2007}}

@inproceedings{Baird1995,
	Author = {L. Baird},
	Booktitle = {Proceedings of the Twelfth International Conference on Machine Learning},
	Title = {Residual algorithms: reinforcement learning with function approximation},
	Year = {1995}}

@proceedings{NSCISC2008,
	Address = {Birmingham, Alabama},
	Month = {January},
	Organization = {National Spinal Cord Injury Center (NSCISC)},
	Title = {Spinal cord injury facts and figures at a glance},
	Year = {2008}}

@article{Peters2008,
	Author = {J. Peters and S. Schaal},
	Journal = {Neurocomputing},
	Pages = {1180--1190},
	Title = {Natural actor-critic},
	Volume = {71},
	Year = {2008}}

@article{LARS,
	Author = {B. Efron and T. Hastie and I. Johnstone and R. Tibshirani},
	Journal = {Annals of Statistics},
	Number = {2},
	Pages = {407--499},
	Title = {Least angle regression},
	Volume = {32},
	Year = {2004}}

@article{Izawa,
	Author = {J. Izawa and K. Toshiyuki and I. Koji},
	Journal = {Biological Cybernetics},
	Number = {1},
	Pages = {10--22},
	Title = {Biological arm motion through reinforcement learning},
	Volume = {91},
	Year = {2004}}

@article{McLean2003,
	Author = {S. McLean and A. Su and A. van den Bogert},
	Journal = {Journal of Biomechanical Engineering},
	Number = {6},
	Pages = {864--874},
	Title = {Development and validation of a 3-D model to predict knee joint loading during dynamic movement},
	Volume = {125},
	Year = {2003}}

@article{Werbos1987,
	Author = {P. J. Werbos},
	Journal = smc,
	Pages = {7-20},
	Title = {Building and Understanding Adaptive Systems: {A} Statistical/Numerical Approach to Factory Automation and Brain Research},
	Volume = 17,
	Year = {1987}}

@book{Bertsekas1996,
	Address = {Belmont, MA},
	Author = {D. P. Bertsekas and J. N. Tsitsiklis},
	Publisher = {Athena Scientific},
	Title = {Neuro-{D}ynamic Programming},
	Year = {1996}}

@incollection{Barto1995,
	Address = {Cambridge, MA},
	Author = {A. G. Barto},
	Booktitle = {Models of Information Processing in the Basal Ganglia},
	Editor = {J. C. Houk and J. L. Davis and D. G. Beiser},
	Pages = {215-232},
	Publisher = {MIT Press},
	Title = {Adaptive Critics and the Basal Ganglia},
	Year = {1995}}

@article{Montague1996,
	Author = {P. R. Montague and P. Dayan and T. J. Sejnowski},
	Journal = {Journal of Neuroscience},
	Pages = {1936-1947},
	Title = {A Framework for Mesencephalic Dopamine Systems Based on Predictive Hebbian Learning},
	Volume = 16,
	Year = {1996}}

@inproceedings{Bagnell2001,
	Author = {J. A. Bagnell and J. Schneider},
	Booktitle = {Proceedings of the International Conference on Robotics and Automation},
	Title = {Autonomous helicopter control using reinforcement learning policy search methods},
	Year = {2001}}

@inproceedings{Yan2005,
	Author = {X. Yan and P. Rusmevichientong and B. Van Roy},
	Booktitle = {Advances in Neural Information Processing Systems},
	Title = {Solitaire: Man Versus Machine},
	Year = {2005}}

@inproceedings{Schaeffer2001,
	Author = {J. Shaeffer and M. Hlynka and Vili Jussila},
	Booktitle = {Proceedings of the International Joint Conference on Artificial Intelligence},
	Title = {Temporal difference learning applied to a high-performance game-playing program},
	Year = {2001}}

@inproceedings{Baxter1998,
	Author = {J. Baxter and A. Tridgell and L. Weaver},
	Booktitle = {Proceedings of the International Conference on Machine Learning},
	Pages = {28--36},
	Title = {Knight{C}ap: {A} chess program that learns by combining {TD$(\lambda)$} with minimax search},
	Year = {1998}}

@inproceedings{Ng2004,
	Author = {A. Ng and J. Kim and M. Jordan and S. Sastry},
	Booktitle = {Advances in Neural Information Processing Systems 17},
	Title = {Autonomous helicopter flight via reinforcement learning},
	Year = {2004}}

@inproceedings{Tesauro1995,
	Author = {G. Tesauro},
	Booktitle = {Communications of the ACM},
	Number = {3},
	Title = {Temporal Difference Learning and {TD}-Gammon},
	Volume = {38},
	Year = {1995}}

@article{Pineau2009,
	Author = {J. Pineau and A. Guez and R. Vincent and G. Panuccio and M. Avoli},
	Journal = {International Journal of Neural Systems},
	Number = {4},
	Pages = {227--240},
	Title = {Treating epilepsy via adaptive neurostimulation: {A} reinforcement learning approach},
	Volume = {19},
	Year = {2009}}

@inproceedings{Moore2010,
	Author = {B. Moore and P. Panousis and V. Kulkarni and L. Pyeatt and A. Doufas},
	Booktitle = {Innovative Applications of Artificial Intelligence},
	Pages = {1807--1813},
	Title = {Reinforcement Learning for Closed-Loop Propofol Anesthesia: A Human Volunteer Study},
	Year = {2010}}

@article{Bogert2012,
	Author = {A. J. van den Bogert and S. Samorezov and B. L. Davis and W. A. Smith},
	Journal = {Submitted},
	Title = {Modeling and Optimal Control of an Energy-Storing Prosthetic Knee},
	Year = {2012}}

@article{Argall2009,
	Author = {B. D. Argall and S. Chernova and M. Veloso and B. Browning},
	Journal = {Robotics and Autonomous Systems},
	Pages = {459--483},
	Title = {A survey of robot learning from demonstration},
	Volume = {57},
	Year = {2009}}

@inproceedings{McGovern2001,
	Author = {A. McGovern and A. G. Barto},
	Booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning},
	Pages = {361--368},
	Title = {Automatic discovery of subgoals in reinforcement learning using diverse density},
	Year = {2001}}

@inproceedings{Maei2010,
	Author = {H. R. Maei and R. S. Sutton},
	Booktitle = {Proceedings of the Third Conference on Artificial General Intelligence},
	Title = {{GQ$(\lambda)$}: {A} general gradient algorithm for temporal-difference prediction learning with eligibility traces},
	Year = {2010}}

@inproceedings{Konidaris2012,
	Author = {G. D. Konidaris and S. R. Kuindersma and R. A. Grupen and A. G. Barto},
	Journal = {The International Journal of Robotics Research},
	Number = {3},
	Pages = {360--375},
	Title = {Robot learning from demonstration by constructing skill trees},
	Volume = {31},
	Year = {2012}}

@inproceedings{Niekum2011,
	Author = {S. Niekum and A. G. Barto},
	Booktitle = {Advances in Neural Information Processing Systems},
	Title = {Clustering via {D}irichlet process mixture models for portable skill discovery},
	Year = {2011}}

@inproceedings{Abbeel2004,
	Author = {P. Abbeel and A. Y. Ng},
	Booktitle = {Proceedings of the Twenty-First International Conference on Machine Learning},
	Title = {Apprenticeship learning via inverse reinforcement learning},
	Year = {2004}}

@inproceedings{Ng2000,
	Author = {A. Y. Ng and S. Russell},
	Booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning},
	Title = {Algorithms for Inverse Reinforcement Learning},
	Year = {2000}}

@inproceedings{Ng1999,
	Author = {A. Y. Ng and D. Harada and S. Russell},
	Booktitle = {Proceedings of the Sixteenth International Conference on Machine Learning},
	Title = {Policy invariance under reward transformations: {T}heory and application to reward shaping},
	Year = {1999}}

@inproceedings{Ijspeert2003,
	Author = {A. J. Ijspeert and J. Nakanishi and S. Schaal},
	Booktitle = {Advances in Neural Information Processing Systems},
	Pages = {1523--1530},
	Title = {Learning attractor landscapes for learning motor primitives},
	Year = {2003}}

@inproceedings{Schaal2003,
	Author = {S. Schaal},
	Booktitle = {Proceedings of the 2nd International Symposium on Adaptive Motion of Animals and Machines},
	Title = {Dynamic movement primitives: {A} framework for motor control in humans and humanoid robotics},
	Year = {2003}}
